In [1]:
library(data.table)

In [2]:
library(ggplot2)

Time difference between edits for human edits (using gradient boosting)


In [3]:
gradient_boosting_precision_and_recall <- data.table(read.table("../../python_analysis_scripts/data_edit_analyses/used_for_model_iteration_2/gradient_boosting_PR_I2_for_anonymous_user_sessions.tsv", header=TRUE, sep="\t"))

In [4]:
summary(gradient_boosting_precision_and_recall)


   precision           recall      
 Min.   :0.05173   Min.   :0.0000  
 1st Qu.:0.06764   1st Qu.:0.9770  
 Median :0.11575   Median :0.9916  
 Mean   :0.21353   Mean   :0.9481  
 3rd Qu.:0.23502   3rd Qu.:0.9972  
 Max.   :1.00000   Max.   :1.0000  

In [5]:
gradient_boosting_roc <- data.table(read.table("../../python_analysis_scripts/data_edit_analyses/used_for_model_iteration_2/gradient_boosting_ROC_I2_for_anonymous_user_sessions.tsv", header=TRUE, sep="\t"))

In [6]:
summary(gradient_boosting_roc)


 false_positives  true_positives     
 Min.   :0.0000   Min.   :0.0006974  
 1st Qu.:0.1144   1st Qu.:0.9714086  
 Median :0.3909   Median :0.9923291  
 Mean   :0.3750   Mean   :0.9398940  
 3rd Qu.:0.5792   3rd Qu.:0.9972106  
 Max.   :1.0000   Max.   :1.0000000  

In [7]:
ggplot(gradient_boosting_precision_and_recall, aes(x=recall,y=precision)) + 
    geom_line() + 
    theme(axis.text = element_text(size=rel(2.4))) +
    theme(axis.title = element_text(size=rel(2.4)));



In [8]:
ggplot(gradient_boosting_roc, aes(x=false_positives,y=true_positives)) +
    geom_line() +
    labs(x = 'false positive rate') +
    labs(y = 'true positive rate') +
    theme(axis.text = element_text(size=rel(2.4))) +
    theme(axis.title = element_text(size=rel(2.4)));



In [ ]: